## packages ##
library(tidyverse)
library(magrittr)
library(GGally)
library(ggplot2)
library(plotly)
#task 1 using
setwd("C:/Users/workshop/Desktop")
#load RData version of the file
load("C:/Users/workshop/Desktop/ld_pop_prism.RData")
# load csv version of the dataset
datacsv<-read_csv("C:/Users/workshop/Desktop/ld_pop_prism.csv")
use ggpairs to obtain a 4 by 4 summary plot of precipitation, average temperature, pop size, number of Lyme disease cases
colnames(fulldata)
## [1] "state" "county" "cases" "year" "fips" "prcp" "avtemp" "size"
ggpairs(fulldata, columns = c("prcp", "avtemp", "size", "cases") )
create 2 new columns for log10(size) and log10(cases+1)
fulldata%<>%
mutate(log10_size = log10(size)) %>%
mutate(log10_cases = log10(cases+1))
ggpairs(fulldata, columns = c("prcp", "avtemp", "log10_size", "log10_cases"))
## why do we add 1 to the number of cases? because log(0) is undefined (not a real number)
set.seed(222); fulldata_100<- fulldata %>% sample_n(100) #1st call
plot_prcp_temp <- fulldata_100 %>%
ggplot(aes(x=prcp, y=avtemp))+geom_point()
plot_prcp_temp + geom_smooth(aes(x=prcp, y=avtemp),method="lm")
#Create a linear model (lm) object with a call like myModel <- lm(y ~ x, data = myData) for the
#subsetted data, where y=avtemp and x=prcp. In addition, view the summary with a call along the lines of
#summary(myModel)
model_prcptemp<-lm(prcp~avtemp, fulldata_100)
summary(model_prcptemp)
##
## Call:
## lm(formula = prcp ~ avtemp, data = fulldata_100)
##
## Residuals:
## Min 1Q Median 3Q Max
## -842.52 -197.58 0.48 213.84 743.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 584.427 80.202 7.287 8.14e-11 ***
## avtemp 29.687 6.007 4.942 3.19e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 292.3 on 98 degrees of freedom
## Multiple R-squared: 0.1995, Adjusted R-squared: 0.1913
## F-statistic: 24.42 on 1 and 98 DF, p-value: 3.19e-06
summary(model_prcptemp)$coefficients[2,1]
## [1] 29.68683
summary(model_prcptemp)$coefficients[2,4]
## [1] 3.190341e-06
What is the slope of the line you plotted in Task 5, and is the slope significantly different from 0 (p<0.05)?
#summary(model_prcptemp)$coefficients[2,1]
#[1]29.68683
#summary(model_prcptemp)$coefficients[2,4]
#[1] 3.190341e-06
Write a single line of code to generate a ggplot of total population size by year.
fulldata %>% group_by(year) %>%
summarise(popsize_per_year=sum(size))%>%
ggplot(aes(x=year, y=popsize_per_year))+geom_point()
Create a data frame called “by_state” from the main data frame, that groups by state, and inspect it.
by_state<-fulldata%>% group_by (state)
by_state
## # A tibble: 46,630 x 10
## # Groups: state [49]
## state county cases year fips prcp avtemp size log10_size
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 Alabama Autauga County 0 2000 1001 959. 18.1 43872 4.64
## 2 Alabama Baldwin County 1 2000 1003 1019. 19.7 141358 5.15
## 3 Alabama Barbour County 0 2000 1005 1006. 18.1 29035 4.46
## 4 Alabama Bibb County 0 2000 1007 994. 17.4 19936 4.30
## 5 Alabama Blount County 0 2000 1009 1179. 16.3 51181 4.71
## 6 Alabama Bullock County 0 2000 1011 1068. 17.7 11604 4.06
## 7 Alabama Butler County 0 2000 1013 1019. 18.7 21313 4.33
## 8 Alabama Calhoun County 0 2000 1015 1004. 16.1 111342 5.05
## 9 Alabama Chambers Coun~ 0 2000 1017 1043. 16.4 36593 4.56
## 10 Alabama Cherokee Coun~ 0 2000 1019 1146. 15.6 24053 4.38
## # ... with 46,620 more rows, and 1 more variable: log10_cases <dbl>
##Task 10: Next, update this new data frame so that it is nested (simply pass it to nest). Again, inspect the
##data frame by typing its name in the console so see how things changed.
by_state %<>% nest
by_state
## # A tibble: 49 x 2
## state data
## <chr> <list>
## 1 Alabama <tibble [1,005 x 9]>
## 2 Arizona <tibble [225 x 9]>
## 3 Arkansas <tibble [1,125 x 9]>
## 4 California <tibble [870 x 9]>
## 5 Colorado <tibble [960 x 9]>
## 6 Connecticut <tibble [120 x 9]>
## 7 Delaware <tibble [45 x 9]>
## 8 District of Columbia <tibble [15 x 9]>
## 9 Florida <tibble [1,005 x 9]>
## 10 Georgia <tibble [2,385 x 9]>
## # ... with 39 more rows
by_state$data[[10]]
## # A tibble: 2,385 x 9
## county cases year fips prcp avtemp size log10_size log10_cases
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
## 1 Appling Co~ 0 2000 13001 965. 18.6 17408 4.24 0
## 2 Atkinson C~ 0 2000 13003 1055. 18.7 7610 3.88 0
## 3 Bacon Coun~ 0 2000 13005 1054. 18.8 10122 4.01 0
## 4 Baker Coun~ 0 2000 13007 963. 18.9 4053 3.61 0
## 5 Baldwin Co~ 0 2000 13009 866. 17.3 44738 4.65 0
## 6 Banks Coun~ 0 2000 13011 993. 15.6 14504 4.16 0
## 7 Barrow Cou~ 0 2000 13013 979. 15.9 46561 4.67 0
## 8 Bartow Cou~ 0 2000 13015 1104. 15.6 76703 4.88 0
## 9 Ben Hill C~ 0 2000 13017 1102. 18.4 17473 4.24 0
## 10 Berrien Co~ 0 2000 13019 1083. 18.7 16250 4.21 0
## # ... with 2,375 more rows
Write a function that takes a data frame as its argument and returns a linear model object that predicts size by year.
linearmodel <- function(df){
lm(size~year,data=df)
}
models <- purrr::map(by_state$data, linearmodel)
Function conflicts: why the purrr::? to avoid conflicts and make sure that the function we use comes from the appropriate package purrr apply the function linearmodel on each element of by_state
Add a column to the by_state dataframe, where each row (state) has its own model object.
by_state%<>%
mutate(model= purrr::map(data, linearmodel))
library(modelr)
by_state%<>%
mutate(resids= purrr::map2(data, model, add_residuals))
Run these commands and inspect “resids”. What is the structure of “resids”?
str(by_state$resids)
## List of 49
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1005 obs. of 10 variables:
## ..$ county : chr [1:1005] "Autauga County" "Baldwin County" "Barbour County" "Bibb County" ...
## ..$ cases : int [1:1005] 0 1 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1005] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1005] 1001 1003 1005 1007 1009 ...
## ..$ prcp : num [1:1005] 959 1019 1006 994 1179 ...
## ..$ avtemp : num [1:1005] 18.1 19.7 18.1 17.4 16.3 ...
## ..$ size : int [1:1005] 43872 141358 29035 19936 51181 11604 21313 111342 36593 24053 ...
## ..$ log10_size : num [1:1005] 4.64 5.15 4.46 4.3 4.71 ...
## ..$ log10_cases: num [1:1005] 0 0.301 0 0 0 ...
## ..$ resid : num [1:1005] -21995 75491 -36832 -45931 -14686 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 225 obs. of 10 variables:
## ..$ county : chr [1:225] "Apache County" "Cochise County" "Coconino County" "Gila County" ...
## ..$ cases : int [1:225] 0 0 0 0 0 0 0 1 0 0 ...
## ..$ year : num [1:225] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:225] 4001 4003 4005 4007 4009 ...
## ..$ prcp : num [1:225] 294 482 317 474 398 ...
## ..$ avtemp : num [1:225] 11.2 16.7 12.2 15.6 16.3 ...
## ..$ size : int [1:225] 69516 118028 116717 51355 33541 8542 19643 3097378 156194 97876 ...
## ..$ log10_size : num [1:225] 4.84 5.07 5.07 4.71 4.53 ...
## ..$ log10_cases: num [1:225] 0 0 0 0 0 ...
## ..$ resid : num [1:225] -285580 -237068 -238379 -303741 -321555 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1125 obs. of 10 variables:
## ..$ county : chr [1:1125] "Arkansas County" "Ashley County" "Baxter County" "Benton County" ...
## ..$ cases : int [1:1125] 0 0 0 1 0 0 0 0 0 0 ...
## ..$ year : num [1:1125] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1125] 5001 5003 5005 5007 5009 ...
## ..$ prcp : num [1:1125] 1037 1336 996 1133 985 ...
## ..$ avtemp : num [1:1125] 17.2 17.4 14.6 14.1 14.4 ...
## ..$ size : int [1:1125] 20672 24160 38465 154791 34051 12603 5717 25411 14083 23508 ...
## ..$ log10_size : num [1:1125] 4.32 4.38 4.59 5.19 4.53 ...
## ..$ log10_cases: num [1:1125] 0 0 0 0.301 0 ...
## ..$ resid : num [1:1125] -14905 -11417 2888 119214 -1526 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 870 obs. of 10 variables:
## ..$ county : chr [1:870] "Alameda County" "Alpine County" "Amador County" "Butte County" ...
## ..$ cases : int [1:870] 4 0 0 3 1 0 1 0 0 1 ...
## ..$ year : num [1:870] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:870] 6001 6003 6005 6007 6009 ...
## ..$ prcp : num [1:870] 546 1014 1049 1166 1056 ...
## ..$ avtemp : num [1:870] 14.98 5.77 14.04 15.06 14.57 ...
## ..$ size : int [1:870] 1450220 1205 35173 203926 40696 18829 953192 27471 157134 801444 ...
## ..$ log10_size : num [1:870] 6.16 3.08 4.55 5.31 4.61 ...
## ..$ log10_cases: num [1:870] 0.699 0 0 0.602 0.301 ...
## ..$ resid : num [1:870] 861952 -587063 -553095 -384342 -547572 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 960 obs. of 10 variables:
## ..$ county : chr [1:960] "Adams County" "Alamosa County" "Arapahoe County" "Archuleta County" ...
## ..$ cases : int [1:960] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:960] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:960] 8001 8003 8005 8007 8009 ...
## ..$ prcp : num [1:960] 338 239 402 576 363 ...
## ..$ avtemp : num [1:960] 10.56 6.48 10.32 6.7 12.56 ...
## ..$ size : int [1:960] 350961 14964 491701 10038 4501 5968 271669 39563 16315 2218 ...
## ..$ log10_size : num [1:960] 5.55 4.18 5.69 4 3.65 ...
## ..$ log10_cases: num [1:960] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:960] 283211 -52786 423951 -57712 -63249 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 120 obs. of 10 variables:
## ..$ county : chr [1:120] "Fairfield County" "Hartford County" "Litchfield County" "Middlesex County" ...
## ..$ cases : int [1:120] 1342 231 457 191 442 439 201 359 1146 269 ...
## ..$ year : num [1:120] 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:120] 9001 9003 9005 9007 9009 ...
## ..$ prcp : num [1:120] 1243 1227 1295 1276 1284 ...
## ..$ avtemp : num [1:120] 9.9 9.39 8.2 9.66 9.74 ...
## ..$ size : int [1:120] 884420 858422 182628 155633 824992 259575 136860 109196 888036 861183 ...
## ..$ log10_size : num [1:120] 5.95 5.93 5.26 5.19 5.92 ...
## ..$ log10_cases: num [1:120] 3.13 2.37 2.66 2.28 2.65 ...
## ..$ resid : num [1:120] 457917 431919 -243875 -270870 398489 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 45 obs. of 10 variables:
## ..$ county : chr [1:45] "Kent County" "New Castle County" "Sussex County" "Kent County" ...
## ..$ cases : int [1:45] 25 121 21 22 102 28 33 132 29 33 ...
## ..$ year : num [1:45] 2000 2000 2000 2001 2001 ...
## ..$ fips : num [1:45] 10001 10003 10005 10001 10003 ...
## ..$ prcp : num [1:45] 1220 1201 1184 1014 934 ...
## ..$ avtemp : num [1:45] 12.7 12.3 13.1 13.4 13.1 ...
## ..$ size : int [1:45] 127109 501913 157389 128821 505564 160235 131301 509113 163717 134222 ...
## ..$ log10_size : num [1:45] 5.1 5.7 5.2 5.11 5.7 ...
## ..$ log10_cases: num [1:45] 1.41 2.09 1.34 1.36 2.01 ...
## ..$ resid : num [1:45] -134446 240358 -104166 -136420 240323 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 15 obs. of 10 variables:
## ..$ county : chr [1:15] "District of Columbia" "District of Columbia" "District of Columbia" "District of Columbia" ...
## ..$ cases : int [1:15] 11 17 25 14 16 10 62 116 74 61 ...
## ..$ year : num [1:15] 2000 2001 2002 2003 2004 ...
## ..$ fips : num [1:15] 11001 11001 11001 11001 11001 ...
## ..$ prcp : num [1:15] 1022 979 967 1713 1061 ...
## ..$ avtemp : num [1:15] 13.5 14.1 14.5 13.1 13.9 ...
## ..$ size : int [1:15] 571744 578042 579585 577777 579796 582049 583978 586409 590074 599657 ...
## ..$ log10_size : num [1:15] 5.76 5.76 5.76 5.76 5.76 ...
## ..$ log10_cases: num [1:15] 1.08 1.26 1.41 1.18 1.23 ...
## ..$ resid : num [1:15] 11938 12515 8337 808 -2894 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1005 obs. of 10 variables:
## ..$ county : chr [1:1005] "Alachua County" "Baker County" "Bay County" "Bradford County" ...
## ..$ cases : int [1:1005] 1 0 0 0 2 0 0 0 1 1 ...
## ..$ year : num [1:1005] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1005] 12001 12003 12005 12007 12009 ...
## ..$ prcp : num [1:1005] 998 976 1058 882 894 ...
## ..$ avtemp : num [1:1005] 20.3 19.5 20 19.8 22.1 ...
## ..$ size : int [1:1005] 218602 22373 148258 26078 477735 1631723 13040 142246 118639 141621 ...
## ..$ log10_size : num [1:1005] 5.34 4.35 5.17 4.42 5.68 ...
## ..$ log10_cases: num [1:1005] 0.301 0 0 0 0.477 ...
## ..$ resid : num [1:1005] -23587 -219816 -93931 -216111 235546 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 2385 obs. of 10 variables:
## ..$ county : chr [1:2385] "Appling County" "Atkinson County" "Bacon County" "Baker County" ...
## ..$ cases : int [1:2385] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:2385] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:2385] 13001 13003 13005 13007 13009 ...
## ..$ prcp : num [1:2385] 965 1055 1054 963 866 ...
## ..$ avtemp : num [1:2385] 18.6 18.7 18.8 18.9 17.3 ...
## ..$ size : int [1:2385] 17408 7610 10122 4053 44738 14504 46561 76703 17473 16250 ...
## ..$ log10_size : num [1:2385] 4.24 3.88 4.01 3.61 4.65 ...
## ..$ log10_cases: num [1:2385] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:2385] -35322 -45120 -42608 -48677 -7992 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 660 obs. of 10 variables:
## ..$ county : chr [1:660] "Ada County" "Adams County" "Bannock County" "Bear Lake County" ...
## ..$ cases : int [1:660] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:660] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:660] 16001 16003 16005 16007 16009 ...
## ..$ prcp : num [1:660] 308 686 421 550 773 ...
## ..$ avtemp : num [1:660] 10.79 6.39 7.83 4.83 6.78 ...
## ..$ size : int [1:660] 303174 3458 75671 6425 9193 41748 19123 6746 36999 82867 ...
## ..$ log10_size : num [1:660] 5.48 3.54 4.88 3.81 3.96 ...
## ..$ log10_cases: num [1:660] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:660] 273592 -26124 46089 -23157 -20389 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1530 obs. of 10 variables:
## ..$ county : chr [1:1530] "Adams County" "Alexander County" "Bond County" "Boone County" ...
## ..$ cases : int [1:1530] 1 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1530] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1530] 17001 17003 17005 17007 17009 ...
## ..$ prcp : num [1:1530] 816 1159 1155 1090 777 ...
## ..$ avtemp : num [1:1530] 11.46 14.07 12.33 8.89 11.31 ...
## ..$ size : int [1:1530] 68220 9584 17650 42053 6963 35473 5090 16626 13680 179931 ...
## ..$ log10_size : num [1:1530] 4.83 3.98 4.25 4.62 3.84 ...
## ..$ log10_cases: num [1:1530] 0.301 0 0 0 0 ...
## ..$ resid : num [1:1530] -54372 -113008 -104942 -80539 -115629 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1380 obs. of 10 variables:
## ..$ county : chr [1:1380] "Adams County" "Allen County" "Bartholomew County" "Benton County" ...
## ..$ cases : int [1:1380] 0 2 0 0 0 1 0 0 0 0 ...
## ..$ year : num [1:1380] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1380] 18001 18003 18005 18007 18009 ...
## ..$ prcp : num [1:1380] 953 979 1260 862 922 ...
## ..$ avtemp : num [1:1380] 10.29 9.84 11.56 10.05 10.31 ...
## ..$ size : int [1:1380] 33612 332744 71737 9396 14006 46380 14987 20147 40965 96749 ...
## ..$ log10_size : num [1:1380] 4.53 5.52 4.86 3.97 4.15 ...
## ..$ log10_cases: num [1:1380] 0 0.477 0 0 0 ...
## ..$ resid : num [1:1380] -32443 266689 5682 -56659 -52049 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1485 obs. of 10 variables:
## ..$ county : chr [1:1485] "Adair County" "Adams County" "Allamakee County" "Appanoose County" ...
## ..$ cases : int [1:1485] 0 0 1 0 0 1 2 0 0 0 ...
## ..$ year : num [1:1485] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1485] 19001 19003 19005 19007 19009 ...
## ..$ prcp : num [1:1485] 606 660 968 789 535 ...
## ..$ avtemp : num [1:1485] 10.29 10.11 8.09 10.37 9.39 ...
## ..$ size : int [1:1485] 8208 4475 14671 13701 6807 25338 128024 26247 23282 21079 ...
## ..$ log10_size : num [1:1485] 3.91 3.65 4.17 4.14 3.83 ...
## ..$ log10_cases: num [1:1485] 0 0 0.301 0 0 ...
## ..$ resid : num [1:1485] -21057 -24790 -14594 -15564 -22458 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1575 obs. of 10 variables:
## ..$ county : chr [1:1575] "Allen County" "Anderson County" "Atchison County" "Barber County" ...
## ..$ cases : int [1:1575] 0 0 2 0 0 0 0 2 0 0 ...
## ..$ year : num [1:1575] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1575] 20001 20003 20005 20007 20009 ...
## ..$ prcp : num [1:1575] 754 617 695 827 717 ...
## ..$ avtemp : num [1:1575] 13.7 13.6 12.7 14.2 13.3 ...
## ..$ size : int [1:1575] 14386 8090 16765 5287 28129 15382 10711 59675 3029 4347 ...
## ..$ log10_size : num [1:1575] 4.16 3.91 4.22 3.72 4.45 ...
## ..$ log10_cases: num [1:1575] 0 0 0.477 0 0 ...
## ..$ resid : num [1:1575] -11079 -17375 -8700 -20178 2664 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1800 obs. of 10 variables:
## ..$ county : chr [1:1800] "Adair County" "Allen County" "Anderson County" "Ballard County" ...
## ..$ cases : int [1:1800] 0 1 0 1 0 0 0 0 0 0 ...
## ..$ year : num [1:1800] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1800] 21001 21003 21005 21007 21009 ...
## ..$ prcp : num [1:1800] 1082 1102 1086 1239 1069 ...
## ..$ avtemp : num [1:1800] 13.7 14.5 12.7 14.1 14.3 ...
## ..$ size : int [1:1800] 17285 17810 19184 8300 38118 11128 30041 87006 19364 49662 ...
## ..$ log10_size : num [1:1800] 4.24 4.25 4.28 3.92 4.58 ...
## ..$ log10_cases: num [1:1800] 0 0.301 0 0.301 0 ...
## ..$ resid : num [1:1800] -16424 -15899 -14525 -25409 4409 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 960 obs. of 10 variables:
## ..$ county : chr [1:960] "Acadia Parish" "Allen Parish" "Ascension Parish" "Assumption Parish" ...
## ..$ cases : int [1:960] 0 0 0 0 0 0 0 1 0 0 ...
## ..$ year : num [1:960] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:960] 22001 22003 22005 22007 22009 ...
## ..$ prcp : num [1:960] 1105 1156 1183 1151 1066 ...
## ..$ avtemp : num [1:960] 20.3 19.9 20.1 20.4 19.6 ...
## ..$ size : int [1:960] 58817 25410 77335 23378 41480 33025 15728 98601 252013 183514 ...
## ..$ log10_size : num [1:960] 4.77 4.41 4.89 4.37 4.62 ...
## ..$ log10_cases: num [1:960] 0 0 0 0 0 ...
## ..$ resid : num [1:960] -9953 -43360 8565 -45392 -27290 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 240 obs. of 10 variables:
## ..$ county : chr [1:240] "Androscoggin County" "Aroostook County" "Cumberland County" "Franklin County" ...
## ..$ cases : int [1:240] 0 0 13 0 0 2 4 3 5 2 ...
## ..$ year : num [1:240] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:240] 23001 23003 23005 23007 23009 ...
## ..$ prcp : num [1:240] 1152 1039 1173 1259 1101 ...
## ..$ avtemp : num [1:240] 6.67 3.72 6.98 3.82 6.35 ...
## ..$ size : int [1:240] 103845 73869 266058 29479 51862 117214 39684 33699 54802 144907 ...
## ..$ log10_size : num [1:240] 5.02 4.87 5.42 4.47 4.71 ...
## ..$ log10_cases: num [1:240] 0 0 1.15 0 0 ...
## ..$ resid : num [1:240] 23329 -6647 185542 -51037 -28654 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 360 obs. of 10 variables:
## ..$ county : chr [1:360] "Allegany County" "Anne Arundel County" "Baltimore County" "Calvert County" ...
## ..$ cases : int [1:360] 1 84 28 27 21 30 42 27 4 26 ...
## ..$ year : num [1:360] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:360] 24001 24003 24005 24009 24011 ...
## ..$ prcp : num [1:360] 903 1209 1088 1180 1175 ...
## ..$ avtemp : num [1:360] 10.7 13.1 12.4 13.5 13 ...
## ..$ size : int [1:360] 74804 491394 756037 75163 29828 151580 86464 121203 30586 196522 ...
## ..$ log10_size : num [1:360] 4.87 5.69 5.88 4.88 4.47 ...
## ..$ log10_cases: num [1:360] 0.301 1.929 1.462 1.447 1.342 ...
## ..$ resid : num [1:360] -147489 269101 533744 -147130 -192465 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 210 obs. of 10 variables:
## ..$ county : chr [1:210] "Barnstable County" "Berkshire County" "Bristol County" "Dukes County" ...
## ..$ cases : int [1:210] 154 51 74 39 204 9 72 34 129 39 ...
## ..$ year : num [1:210] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:210] 25001 25003 25005 25007 25009 ...
## ..$ prcp : num [1:210] 1076 1396 1242 1070 1289 ...
## ..$ avtemp : num [1:210] 9.91 6.66 10.16 9.92 9.31 ...
## ..$ size : int [1:210] 223245 134787 536008 15072 725379 71499 456573 152381 1468934 9574 ...
## ..$ log10_size : num [1:210] 5.35 5.13 5.73 4.18 5.86 ...
## ..$ log10_cases: num [1:210] 2.19 1.72 1.88 1.6 2.31 ...
## ..$ resid : num [1:210] -230962 -319420 81801 -439135 271172 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1245 obs. of 10 variables:
## ..$ county : chr [1:1245] "Alcona County" "Alger County" "Allegan County" "Alpena County" ...
## ..$ cases : int [1:1245] 0 0 0 1 0 0 0 0 0 0 ...
## ..$ year : num [1:1245] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1245] 26001 26003 26005 26007 26009 ...
## ..$ prcp : num [1:1245] 732 799 1174 741 732 ...
## ..$ avtemp : num [1:1245] 6.73 5.57 8.94 6.69 7.17 ...
## ..$ size : int [1:1245] 11709 9827 106048 31276 23258 17263 8872 56903 110091 16106 ...
## ..$ log10_size : num [1:1245] 4.07 3.99 5.03 4.5 4.37 ...
## ..$ log10_cases: num [1:1245] 0 0 0 0.301 0 ...
## ..$ resid : num [1:1245] -109651 -111533 -15312 -90084 -98102 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1305 obs. of 10 variables:
## ..$ county : chr [1:1305] "Aitkin County" "Anoka County" "Becker County" "Beltrami County" ...
## ..$ cases : int [1:1305] 11 58 0 1 6 0 1 0 3 3 ...
## ..$ year : num [1:1305] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1305] 27001 27003 27005 27007 27009 ...
## ..$ prcp : num [1:1305] 671 649 734 685 613 ...
## ..$ avtemp : num [1:1305] 4.92 7.17 4.91 3.91 6.03 ...
## ..$ size : int [1:1305] 15349 299775 30066 39799 34502 5782 55947 26876 31723 70860 ...
## ..$ log10_size : num [1:1305] 4.19 5.48 4.48 4.6 4.54 ...
## ..$ log10_cases: num [1:1305] 1.079 1.771 0 0.301 0.845 ...
## ..$ resid : num [1:1305] -41371 243055 -26654 -16921 -22218 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1230 obs. of 10 variables:
## ..$ county : chr [1:1230] "Adams County" "Alcorn County" "Amite County" "Attala County" ...
## ..$ cases : int [1:1230] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1230] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1230] 28001 28003 28005 28007 28009 ...
## ..$ prcp : num [1:1230] 1132 1107 1167 1202 986 ...
## ..$ avtemp : num [1:1230] 19 16.1 18.9 17.3 15.9 ...
## ..$ size : int [1:1230] 34214 34606 13578 19643 8024 40470 15052 10773 19408 9748 ...
## ..$ log10_size : num [1:1230] 4.53 4.54 4.13 4.29 3.9 ...
## ..$ log10_cases: num [1:1230] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:1230] -416.9 -24.9 -21052.9 -14987.9 -26606.9 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1725 obs. of 10 variables:
## ..$ county : chr [1:1725] "Adair County" "Andrew County" "Atchison County" "Audrain County" ...
## ..$ cases : int [1:1725] 0 0 0 0 0 0 1 0 3 0 ...
## ..$ year : num [1:1725] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1725] 29001 29003 29005 29007 29009 ...
## ..$ prcp : num [1:1725] 770 795 680 952 1047 ...
## ..$ avtemp : num [1:1725] 11 11.7 11.3 12.2 13.4 ...
## ..$ size : int [1:1725] 24961 16530 6405 25807 34050 12541 16703 17233 12053 135846 ...
## ..$ log10_size : num [1:1725] 4.4 4.22 3.81 4.41 4.53 ...
## ..$ log10_cases: num [1:1725] 0 0 0 0 0 ...
## ..$ resid : num [1:1725] -23987 -32418 -42543 -23141 -14898 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 840 obs. of 10 variables:
## ..$ county : chr [1:840] "Beaverhead County" "Big Horn County" "Blaine County" "Broadwater County" ...
## ..$ cases : int [1:840] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:840] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:840] 30001 30003 30005 30007 30009 ...
## ..$ prcp : num [1:840] 438 362 296 342 425 ...
## ..$ avtemp : num [1:840] 3.35 7.63 5.88 6.02 6.42 ...
## ..$ size : int [1:840] 9187 12659 6973 4366 9564 1345 80201 5975 11667 2012 ...
## ..$ log10_size : num [1:840] 3.96 4.1 3.84 3.64 3.98 ...
## ..$ log10_cases: num [1:840] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:840] -6769 -3297 -8983 -11590 -6392 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1395 obs. of 10 variables:
## ..$ county : chr [1:1395] "Adams County" "Antelope County" "Arthur County" "Banner County" ...
## ..$ cases : int [1:1395] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1395] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1395] 31001 31003 31005 31007 31009 ...
## ..$ prcp : num [1:1395] 618 564 380 326 527 ...
## ..$ avtemp : num [1:1395] 10.67 9.37 9.6 9.4 9.44 ...
## ..$ size : int [1:1395] 31153 7440 442 823 582 6213 12112 2427 3519 42341 ...
## ..$ log10_size : num [1:1395] 4.49 3.87 2.65 2.92 2.76 ...
## ..$ log10_cases: num [1:1395] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:1395] 12915 -10798 -17796 -17415 -17656 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 255 obs. of 10 variables:
## ..$ county : chr [1:255] "Churchill County" "Clark County" "Douglas County" "Elko County" ...
## ..$ cases : int [1:255] 1 3 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:255] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:255] 32001 32003 32005 32007 32009 ...
## ..$ prcp : num [1:255] 178 138 335 321 139 ...
## ..$ avtemp : num [1:255] 11.2 18.29 8.87 7.51 11.7 ...
## ..$ size : int [1:255] 24008 1393370 41429 45236 972 1627 15903 5694 4172 34841 ...
## ..$ log10_size : num [1:255] 4.38 6.14 4.62 4.66 2.99 ...
## ..$ log10_cases: num [1:255] 0.301 0.602 0 0 0 ...
## ..$ resid : num [1:255] -98298 1271064 -80877 -77070 -121334 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 150 obs. of 10 variables:
## ..$ county : chr [1:150] "Belknap County" "Carroll County" "Cheshire County" "Coos County" ...
## ..$ cases : int [1:150] 5 2 7 3 4 14 8 31 10 0 ...
## ..$ year : num [1:150] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:150] 33001 33003 33005 33007 33009 ...
## ..$ prcp : num [1:150] 1150 1272 1256 1332 1259 ...
## ..$ avtemp : num [1:150] 6.87 5.95 7 3.61 4.96 ...
## ..$ size : int [1:150] 56584 43910 73984 33143 81797 382409 136648 278733 112686 40552 ...
## ..$ log10_size : num [1:150] 4.75 4.64 4.87 4.52 4.91 ...
## ..$ log10_cases: num [1:150] 0.778 0.477 0.903 0.602 0.699 ...
## ..$ resid : num [1:150] -69781 -82455 -52381 -93222 -44568 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 315 obs. of 10 variables:
## ..$ county : chr [1:315] "Atlantic County" "Bergen County" "Burlington County" "Camden County" ...
## ..$ cases : int [1:315] 90 104 22 11 15 32 58 101 14 538 ...
## ..$ year : num [1:315] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:315] 34001 34003 34005 34007 34009 ...
## ..$ prcp : num [1:315] 1111 1165 1148 1198 1140 ...
## ..$ avtemp : num [1:315] 12.1 11 12 12.4 12.5 ...
## ..$ size : int [1:315] 253038 885329 424547 507648 102307 146362 792302 256340 609422 122553 ...
## ..$ log10_size : num [1:315] 5.4 5.95 5.63 5.71 5.01 ...
## ..$ log10_cases: num [1:315] 1.96 2.02 1.36 1.08 1.2 ...
## ..$ resid : num [1:315] -149174 483117 22335 105436 -299905 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 495 obs. of 10 variables:
## ..$ county : chr [1:495] "Bernalillo County" "Catron County" "Chaves County" "Cibola County" ...
## ..$ cases : int [1:495] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:495] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:495] 35001 35003 35005 35006 35007 ...
## ..$ prcp : num [1:495] 328 379 291 302 419 ...
## ..$ avtemp : num [1:495] 12.59 9.93 15.75 10.3 9.08 ...
## ..$ size : int [1:495] 557158 3566 61297 25644 14206 44901 2214 174973 51419 30892 ...
## ..$ log10_size : num [1:495] 5.75 3.55 4.79 4.41 4.15 ...
## ..$ log10_cases: num [1:495] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:495] 502257 -51335 6396 -29257 -40695 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 930 obs. of 10 variables:
## ..$ county : chr [1:930] "Albany County" "Allegany County" "Bronx County" "Broome County" ...
## ..$ cases : int [1:930] 57 0 14 5 0 3 4 1 3 3 ...
## ..$ year : num [1:930] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:930] 36001 36003 36005 36007 36009 ...
## ..$ prcp : num [1:930] 1165 1009 1217 1157 1095 ...
## ..$ avtemp : num [1:930] 7.51 6.85 12.07 7.37 6.81 ...
## ..$ size : int [1:930] 294960 49916 1333854 200319 84017 81925 139659 91079 51381 79897 ...
## ..$ log10_size : num [1:930] 5.47 4.7 6.13 5.3 4.92 ...
## ..$ log10_cases: num [1:930] 1.763 0 1.176 0.778 0 ...
## ..$ resid : num [1:930] -12517 -257561 1026377 -107158 -223460 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1500 obs. of 10 variables:
## ..$ county : chr [1:1500] "Alamance County" "Alexander County" "Alleghany County" "Anson County" ...
## ..$ cases : int [1:1500] 1 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1500] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1500] 37001 37003 37005 37007 37009 ...
## ..$ prcp : num [1:1500] 1010 857 996 1056 992 ...
## ..$ avtemp : num [1:1500] 14.6 14.1 10.3 16 10.1 ...
## ..$ size : int [1:1500] 131521 33674 10687 25310 24449 17328 44997 19715 32240 73756 ...
## ..$ log10_size : num [1:1500] 5.12 4.53 4.03 4.4 4.39 ...
## ..$ log10_cases: num [1:1500] 0.301 0 0 0 0 ...
## ..$ resid : num [1:1500] 51147 -46700 -69687 -55064 -55925 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 795 obs. of 10 variables:
## ..$ county : chr [1:795] "Adams County" "Barnes County" "Benson County" "Billings County" ...
## ..$ cases : int [1:795] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:795] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:795] 38001 38003 38005 38007 38009 ...
## ..$ prcp : num [1:795] 440 569 654 341 559 ...
## ..$ avtemp : num [1:795] 6.51 5.24 4.54 5.96 4.1 ...
## ..$ size : int [1:795] 2576 11704 6957 877 7119 3229 2232 69530 123500 4803 ...
## ..$ log10_size : num [1:795] 3.41 4.07 3.84 2.94 3.85 ...
## ..$ log10_cases: num [1:795] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:795] -8982 146 -4601 -10681 -4439 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1320 obs. of 10 variables:
## ..$ county : chr [1:1320] "Adams County" "Allen County" "Ashland County" "Ashtabula County" ...
## ..$ cases : int [1:1320] 1 1 0 4 0 0 1 0 6 0 ...
## ..$ year : num [1:1320] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1320] 39001 39003 39005 39007 39009 ...
## ..$ prcp : num [1:1320] 1203 919 1041 1017 1004 ...
## ..$ avtemp : num [1:1320] 11.56 10.17 9.13 9.06 10.77 ...
## ..$ size : int [1:1320] 27337 108577 52584 102745 62323 46589 70124 42583 333695 28869 ...
## ..$ log10_size : num [1:1320] 4.44 5.04 4.72 5.01 4.79 ...
## ..$ log10_cases: num [1:1320] 0.301 0.301 0 0.699 0 ...
## ..$ resid : num [1:1320] -102142 -20902 -76895 -26734 -67156 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1155 obs. of 10 variables:
## ..$ county : chr [1:1155] "Adair County" "Alfalfa County" "Atoka County" "Beaver County" ...
## ..$ cases : int [1:1155] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:1155] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1155] 40001 40003 40005 40007 40009 ...
## ..$ prcp : num [1:1155] 1276 808 983 509 723 ...
## ..$ avtemp : num [1:1155] 14.8 15 16.9 14 15.5 ...
## ..$ size : int [1:1155] 21050 6080 13840 5792 19744 11957 36597 30098 88144 45591 ...
## ..$ log10_size : num [1:1155] 4.32 3.78 4.14 3.76 4.3 ...
## ..$ log10_cases: num [1:1155] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:1155] -23194 -38164 -30404 -38452 -24500 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 540 obs. of 10 variables:
## ..$ county : chr [1:540] "Baker County" "Benton County" "Clackamas County" "Clatsop County" ...
## ..$ cases : int [1:540] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:540] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:540] 41001 41003 41005 41007 41009 ...
## ..$ prcp : num [1:540] 481 1227 1544 2206 1137 ...
## ..$ avtemp : num [1:540] 7.21 10.78 8.98 9.57 9.51 ...
## ..$ size : int [1:540] 16717 78197 339546 35571 43657 62668 19318 21104 116618 100423 ...
## ..$ log10_size : num [1:540] 4.22 4.89 5.53 4.55 4.64 ...
## ..$ log10_cases: num [1:540] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:540] -78749 -17269 244080 -59895 -51809 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1005 obs. of 10 variables:
## ..$ county : chr [1:1005] "Adams County" "Allegheny County" "Armstrong County" "Beaver County" ...
## ..$ cases : int [1:1005] 24 10 1 2 5 51 7 2 506 0 ...
## ..$ year : num [1:1005] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1005] 42001 42003 42005 42007 42009 ...
## ..$ prcp : num [1:1005] 1079 936 957 917 914 ...
## ..$ avtemp : num [1:1005] 10.77 10.66 9.57 10.25 9.6 ...
## ..$ size : int [1:1005] 91594 1279927 72314 181113 50022 374521 129030 62760 599454 174519 ...
## ..$ log10_size : num [1:1005] 4.96 6.11 4.86 5.26 4.7 ...
## ..$ log10_cases: num [1:1005] 1.398 1.041 0.301 0.477 0.778 ...
## ..$ resid : num [1:1005] -91175 1097158 -110455 -1656 -132747 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 75 obs. of 10 variables:
## ..$ county : chr [1:75] "Bristol County" "Kent County" "Newport County" "Providence County" ...
## ..$ cases : int [1:75] 5 120 29 119 378 4 98 18 88 280 ...
## ..$ year : num [1:75] 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:75] 44001 44003 44005 44007 44009 ...
## ..$ prcp : num [1:75] 1228 1223 1231 1238 1296 ...
## ..$ avtemp : num [1:75] 10.67 9.87 10.42 9.48 9.96 ...
## ..$ size : int [1:75] 50723 167484 85657 622881 123991 51001 168686 85442 627678 125244 ...
## ..$ log10_size : num [1:75] 4.71 5.22 4.93 5.79 5.09 ...
## ..$ log10_cases: num [1:75] 0.778 2.083 1.477 2.079 2.579 ...
## ..$ resid : num [1:75] -162026 -45265 -127092 410132 -88758 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 690 obs. of 10 variables:
## ..$ county : chr [1:690] "Abbeville County" "Aiken County" "Allendale County" "Anderson County" ...
## ..$ cases : int [1:690] 0 1 0 0 0 0 3 0 0 3 ...
## ..$ year : num [1:690] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:690] 45001 45003 45005 45007 45009 ...
## ..$ prcp : num [1:690] 944 1061 966 962 1061 ...
## ..$ avtemp : num [1:690] 15.8 17.2 17.7 15.9 17.5 ...
## ..$ size : int [1:690] 26226 142726 11200 166319 16609 23446 122077 143131 15224 311047 ...
## ..$ log10_size : num [1:690] 4.42 5.15 4.05 5.22 4.22 ...
## ..$ log10_cases: num [1:690] 0 0.301 0 0 0 ...
## ..$ resid : num [1:690] -60455 56045 -75481 79638 -70072 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 990 obs. of 10 variables:
## ..$ county : chr [1:990] "Aurora County" "Beadle County" "Bennett County" "Bon Homme County" ...
## ..$ cases : int [1:990] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ year : num [1:990] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:990] 46003 46005 46007 46009 46011 ...
## ..$ prcp : num [1:990] 443 500 470 508 570 ...
## ..$ avtemp : num [1:990] 8.99 7.86 9.01 9.62 6.75 ...
## ..$ size : int [1:990] 3060 16983 3573 7248 28295 35382 5351 2008 9111 1780 ...
## ..$ log10_size : num [1:990] 3.49 4.23 3.55 3.86 4.45 ...
## ..$ log10_cases: num [1:990] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ resid : num [1:990] -8278 5645 -7765 -4090 16957 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1425 obs. of 10 variables:
## ..$ county : chr [1:1425] "Anderson County" "Bedford County" "Benton County" "Bledsoe County" ...
## ..$ cases : int [1:1425] 0 0 0 0 0 1 0 0 1 0 ...
## ..$ year : num [1:1425] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1425] 47001 47003 47005 47007 47009 ...
## ..$ prcp : num [1:1425] 1318 1314 989 1306 1263 ...
## ..$ avtemp : num [1:1425] 13.6 14.7 14.8 13.5 13.7 ...
## ..$ size : int [1:1425] 71229 37822 16535 12414 106212 88203 39861 12906 29477 56834 ...
## ..$ log10_size : num [1:1425] 4.85 4.58 4.22 4.09 5.03 ...
## ..$ log10_cases: num [1:1425] 0 0 0 0 0 ...
## ..$ resid : num [1:1425] 11310 -22097 -43384 -47505 46293 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 3810 obs. of 10 variables:
## ..$ county : chr [1:3810] "Anderson County" "Andrews County" "Angelina County" "Aransas County" ...
## ..$ cases : int [1:3810] 1 0 1 0 0 0 0 1 0 0 ...
## ..$ year : num [1:3810] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:3810] 48001 48003 48005 48007 48009 ...
## ..$ prcp : num [1:3810] 1213 282 1372 806 687 ...
## ..$ avtemp : num [1:3810] 19.6 18.2 19.5 22.5 18.1 ...
## ..$ size : int [1:3810] 55068 12911 80245 22537 8917 2166 38874 23745 6583 17811 ...
## ..$ log10_size : num [1:3810] 4.74 4.11 4.9 4.35 3.95 ...
## ..$ log10_cases: num [1:3810] 0.301 0 0.301 0 0 ...
## ..$ resid : num [1:3810] -26813 -68970 -1636 -59344 -72964 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 435 obs. of 10 variables:
## ..$ county : chr [1:435] "Beaver County" "Box Elder County" "Cache County" "Carbon County" ...
## ..$ cases : int [1:435] 0 0 0 0 0 1 0 0 0 0 ...
## ..$ year : num [1:435] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:435] 49001 49003 49005 49007 49009 ...
## ..$ prcp : num [1:435] 376 304 649 424 394 ...
## ..$ avtemp : num [1:435] 10.05 9.88 6.91 7.84 5.7 ...
## ..$ size : int [1:435] 6016 42872 91872 20354 928 240293 14371 10946 4748 8399 ...
## ..$ log10_size : num [1:435] 3.78 4.63 4.96 4.31 2.97 ...
## ..$ log10_cases: num [1:435] 0 0 0 0 0 ...
## ..$ resid : num [1:435] -71580 -34724 14276 -57242 -76668 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 210 obs. of 10 variables:
## ..$ county : chr [1:210] "Addison County" "Bennington County" "Caledonia County" "Chittenden County" ...
## ..$ cases : int [1:210] 3 10 0 3 1 2 0 0 3 0 ...
## ..$ year : num [1:210] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:210] 50001 50003 50005 50007 50009 ...
## ..$ prcp : num [1:210] 1144 1655 1171 1089 1268 ...
## ..$ avtemp : num [1:210] 6.16 5.67 4.35 6.3 3.99 ...
## ..$ size : int [1:210] 36025 36976 29728 147070 6456 45581 6939 23333 28307 26335 ...
## ..$ log10_size : num [1:210] 4.56 4.57 4.47 5.17 3.81 ...
## ..$ log10_cases: num [1:210] 0.602 1.041 0 0.602 0.301 ...
## ..$ resid : num [1:210] -7698 -6747 -13995 103347 -37267 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 2005 obs. of 10 variables:
## ..$ county : chr [1:2005] "Accomack County" "Albemarle County" "Alleghany County" "Amelia County" ...
## ..$ cases : int [1:2005] 2 3 0 0 0 0 5 1 0 3 ...
## ..$ year : num [1:2005] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:2005] 51001 51003 51005 51007 51009 ...
## ..$ prcp : num [1:2005] 1070 919 991 1034 1015 ...
## ..$ avtemp : num [1:2005] 14 12.9 10.9 13.7 12.5 ...
## ..$ size : int [1:2005] 38365 84622 17200 11478 31882 13691 189527 65772 5032 60606 ...
## ..$ log10_size : num [1:2005] 4.58 4.93 4.24 4.06 4.5 ...
## ..$ log10_cases: num [1:2005] 0.477 0.602 0 0 0 ...
## ..$ resid : num [1:2005] -14530 31727 -35695 -41417 -21013 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 585 obs. of 10 variables:
## ..$ county : chr [1:585] "Adams County" "Asotin County" "Benton County" "Chelan County" ...
## ..$ cases : int [1:585] 0 0 0 0 1 0 0 0 0 0 ...
## ..$ year : num [1:585] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:585] 53001 53003 53005 53007 53009 ...
## ..$ prcp : num [1:585] 276 425 235 831 1659 ...
## ..$ avtemp : num [1:585] 9.28 9.05 10.93 5.12 8.29 ...
## ..$ size : int [1:585] 16450 20555 143108 66688 64285 347525 4071 93012 32691 7276 ...
## ..$ log10_size : num [1:585] 4.22 4.31 5.16 4.82 4.81 ...
## ..$ log10_cases: num [1:585] 0 0 0 0 0.301 ...
## ..$ resid : num [1:585] -134316 -130211 -7658 -84078 -86481 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 825 obs. of 10 variables:
## ..$ county : chr [1:825] "Barbour County" "Berkeley County" "Boone County" "Braxton County" ...
## ..$ cases : int [1:825] 0 12 0 0 1 0 1 0 0 0 ...
## ..$ year : num [1:825] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:825] 54001 54003 54005 54007 54009 ...
## ..$ prcp : num [1:825] 1156 921 1134 1146 975 ...
## ..$ avtemp : num [1:825] 10.2 11.8 12.1 11.4 10.5 ...
## ..$ size : int [1:825] 15552 76414 25491 14720 25469 96709 7581 10315 7408 47498 ...
## ..$ log10_size : num [1:825] 4.19 4.88 4.41 4.17 4.41 ...
## ..$ log10_cases: num [1:825] 0 1.114 0 0 0.301 ...
## ..$ resid : num [1:825] -16983 43879 -7044 -17815 -7066 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 1080 obs. of 10 variables:
## ..$ county : chr [1:1080] "Adams County" "Ashland County" "Barron County" "Bayfield County" ...
## ..$ cases : int [1:1080] 1 4 21 5 0 5 21 1 29 8 ...
## ..$ year : num [1:1080] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:1080] 55001 55003 55005 55007 55009 ...
## ..$ prcp : num [1:1080] 883 798 871 750 802 ...
## ..$ avtemp : num [1:1080] 7.39 4.7 6 5.15 7.01 ...
## ..$ size : int [1:1080] 20000 16854 45026 15032 227338 13807 15714 40728 55324 33629 ...
## ..$ log10_size : num [1:1080] 4.3 4.23 4.65 4.18 5.36 ...
## ..$ log10_cases: num [1:1080] 0.301 0.699 1.342 0.778 0 ...
## ..$ resid : num [1:1080] -54912 -58058 -29886 -59880 152426 ...
## $ :Classes 'tbl_df', 'tbl' and 'data.frame': 345 obs. of 10 variables:
## ..$ county : chr [1:345] "Albany County" "Big Horn County" "Campbell County" "Carbon County" ...
## ..$ cases : int [1:345] 1 0 0 0 0 0 0 0 0 1 ...
## ..$ year : num [1:345] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## ..$ fips : num [1:345] 56001 56003 56005 56007 56009 ...
## ..$ prcp : num [1:345] 377 277 298 376 364 ...
## ..$ avtemp : num [1:345] 5.05 6.31 7.53 5.67 7.14 ...
## ..$ size : int [1:345] 31914 11411 33937 15579 12097 5897 35817 12544 4865 7101 ...
## ..$ log10_size : num [1:345] 4.5 4.06 4.53 4.19 4.08 ...
## ..$ log10_cases: num [1:345] 0.301 0 0 0 0 ...
## ..$ resid : num [1:345] 11088 -9415 13111 -5247 -8729 ...
## resids is nested in by_state (same level as data and model)
Write a function that accepts an object of the type in the resids list, and returns a sum of the absolute values, i.e. ignoring sign: abs(3)+abs(-2)=5. Use the function to add a column called totalResid to by_state that provides the total size of residuals summed over counties and years.
#x is the dataframe by_state
sum_resids <- function (x) {
sum(abs(x$resid))
}
by_state%<>%
mutate(totalResid = purrr::map (resids, sum_resids))
Write a function that accepts a linear model and returns the slope (model M has slope M$coefficients[2]) and then use this function to create a new column called slope in the by_state dataframe, that is the slope for each state.
# x is by_state$model
slope <- function (x){
sl<-x$coefficients[2]
return(sl)
}
by_state%<>%
mutate(slope = purrr::map (model, slope))
slopes <- unnest(by_state, slope)
totalResids <- unnest(by_state, totalResid)
Plot the growth rate (slope value) for all states.
plot_growth_bystate<- slopes %>%
ggplot(aes(x=state, y=slope))+geom_point()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot_growth_bystate
Plot the total resisduals for all states.
plot_growth_bystate<- totalResids %>%
ggplot(aes(x=state, y=totalResid))+geom_point()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot_growth_bystate
Repeat Tasks 9 and 10 using a different data frame name, by_state2.
#Grouped data frames versus nested data frames
#Task 9: Create a data frame called "by_state" from the main data frame, that groups by state, #and inspect it.
by_state2<-fulldata%>% group_by (state)
by_state2
## # A tibble: 46,630 x 10
## # Groups: state [49]
## state county cases year fips prcp avtemp size log10_size
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 Alabama Autauga County 0 2000 1001 959. 18.1 43872 4.64
## 2 Alabama Baldwin County 1 2000 1003 1019. 19.7 141358 5.15
## 3 Alabama Barbour County 0 2000 1005 1006. 18.1 29035 4.46
## 4 Alabama Bibb County 0 2000 1007 994. 17.4 19936 4.30
## 5 Alabama Blount County 0 2000 1009 1179. 16.3 51181 4.71
## 6 Alabama Bullock County 0 2000 1011 1068. 17.7 11604 4.06
## 7 Alabama Butler County 0 2000 1013 1019. 18.7 21313 4.33
## 8 Alabama Calhoun County 0 2000 1015 1004. 16.1 111342 5.05
## 9 Alabama Chambers Coun~ 0 2000 1017 1043. 16.4 36593 4.56
## 10 Alabama Cherokee Coun~ 0 2000 1019 1146. 15.6 24053 4.38
## # ... with 46,620 more rows, and 1 more variable: log10_cases <dbl>
##Task 10: Next, update this new data frame so that it is nested (simply pass it to nest). Again, inspect the
##data frame by typing its name in the console so see how things changed.
by_state2 %<>% nest
by_state2
## # A tibble: 49 x 2
## state data
## <chr> <list>
## 1 Alabama <tibble [1,005 x 9]>
## 2 Arizona <tibble [225 x 9]>
## 3 Arkansas <tibble [1,125 x 9]>
## 4 California <tibble [870 x 9]>
## 5 Colorado <tibble [960 x 9]>
## 6 Connecticut <tibble [120 x 9]>
## 7 Delaware <tibble [45 x 9]>
## 8 District of Columbia <tibble [15 x 9]>
## 9 Florida <tibble [1,005 x 9]>
## 10 Georgia <tibble [2,385 x 9]>
## # ... with 39 more rows
by_state2$data[[10]]
## # A tibble: 2,385 x 9
## county cases year fips prcp avtemp size log10_size log10_cases
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
## 1 Appling Co~ 0 2000 13001 965. 18.6 17408 4.24 0
## 2 Atkinson C~ 0 2000 13003 1055. 18.7 7610 3.88 0
## 3 Bacon Coun~ 0 2000 13005 1054. 18.8 10122 4.01 0
## 4 Baker Coun~ 0 2000 13007 963. 18.9 4053 3.61 0
## 5 Baldwin Co~ 0 2000 13009 866. 17.3 44738 4.65 0
## 6 Banks Coun~ 0 2000 13011 993. 15.6 14504 4.16 0
## 7 Barrow Cou~ 0 2000 13013 979. 15.9 46561 4.67 0
## 8 Bartow Cou~ 0 2000 13015 1104. 15.6 76703 4.88 0
## 9 Ben Hill C~ 0 2000 13017 1102. 18.4 17473 4.24 0
## 10 Berrien Co~ 0 2000 13019 1083. 18.7 16250 4.21 0
## # ... with 2,375 more rows
Write a function that accepts an element of the by_state2$data list-column and returns the spearman correlation coefficient between Lyme disease cases and precipitation
#df is by_state2
Corr<- function (df) {
suppressWarnings(cor.test(df$cases, df$prcp, method="spearman")$estimate)
}
by_state2%<>%
mutate(corr_coef = purrr:: map (data, Corr))
spcor<- unnest (by_state2, corr_coef)
spcor%<>% arrange(desc(corr_coef))
spcor$state <- factor(spcor$state, levels=unique(spcor$state))
myplot<-ggplot(spcor,aes(state,corr_coef))+geom_point()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly(myplot)